Libs



In [64]:

    
import re
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime
from datetime import date, timedelta,datetime
import scipy
import os
import math
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline



In [88]:

    
def calc_score(self):
        """
        Invented in 1927 by Edwin B. Wilson
        (http://www.evanmiller.org/how-not-to-sort-by-average-rating.html)

        ****
        Deprecated, as this calculation is moved into SQL
        ****
        """
        n = self.allvotes()
        if n == 0:
            return 0
        pos = self.upvotes_count()
        z = 1.96
        p = 1.0 * pos / n
        score = (p + z * z / (2 * n) - z * sqrt((p * (1 - p) + z * z / (4 * n)) / n)) / (1 + z * z / n)
        return score



In [105]:

    
def calc_score(num_sents,positive):
        """
        Invented in 1927 by Edwin B. Wilson
        (http://www.evanmiller.org/how-not-to-sort-by-average-rating.html)

        ****
        Deprecated, as this calculation is moved into SQL
        ****
        """
        n = num_sents
        if n == 0:
            return 0
        pos = positive
        z = 1.96
        p = 1.0 * pos / n
        score = (p + z * z / (2 * n) - z * sqrt((p * (1 - p) + z * z / (4 * n)) / n)) / (1 + z * z / n)
        return score



In [106]:

    
#wilson=calc_score()
calc_score(10,3)









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-106-ec889281165a> in <module>()
      1 #wilson=calc_score()
----> 2 calc_score(10,3)

<ipython-input-105-cced03bc4506> in calc_score(num_sents, positive)
     14         z = 1.96
     15         p = 1.0 * pos / n
---> 16         score = (p + z * z / (2 * n) - z * sqrt((p * (1 - p) + z * z / (4 * n)) / n)) / (1 + z * z / n)
     17         return score

NameError: global name 'sqrt' is not defined



In [94]:

    
# location of data
sent_results=pd.read_csv('../../output/data.csv')

Plot polarity (no normalization)



In [82]:

    
plt.plot(sent_results['paragraph'],sent_results[' polarity'])
plt.xlabel('paragraph')
plt.ylabel('polarity')









    Out[82]:





<matplotlib.text.Text at 0xeffc0f0>

Plot polarity (normalized by 'count')



In [83]:

    
plt.plot(sent_results['paragraph'],sent_results[' polarity']/sent_results[' count'])
plt.xlabel('paragraph')
plt.ylabel('polarity (normalised)')









    Out[83]:





<matplotlib.text.Text at 0x1041aac8>

Plot positive and negative word counts



In [86]:

    
plt.plot(sent_results['paragraph'],sent_results[' negative'],label='negative')
plt.plot(sent_results['paragraph'],sent_results[' positive'],label='positive')
plt.legend()
plt.xlabel('paragraph')









    Out[86]:





<matplotlib.text.Text at 0x10a365f8>

Plot positive and negative word counts (normalized by 'count')



In [85]:

    
plt.plot(sent_results['paragraph'],sent_results[' negative']/sent_results[' count'],label='negative')
plt.plot(sent_results['paragraph'],sent_results[' positive']/sent_results[' count'],label='positive')
plt.legend()
plt.xlabel('paragraph')









    Out[85]:





<matplotlib.text.Text at 0x1093a358>



In [87]:

    
sent_results



In [ ]:

	paragraph	count	polarity	positive	negative
0	0	607	0.125000	9	7
1	1	1743	-0.176471	7	10
2	2	1625	0.043478	12	11
3	3	394	0.250000	5	3
4	4	984	0.157895	11	8
5	5	547	0.600000	8	2
6	6	799	0.230769	8	5
7	7	1276	0.750000	14	2
8	8	961	0.428571	10	4
9	9	554	-0.333333	2	4
10	10	1010	0.647059	14	3
11	11	419	0.000000	4	4
12	12	1245	-0.125000	7	9
13	13	595	0.333333	10	5
14	14	3472	0.155556	26	19
15	15	433	0.333333	4	2
16	16	362	-1.000000	0	4
17	17	243	1.000000	3	0
18	18	123	-0.999999	0	1
19	19	1144	0.000000	7	7
20	20	878	0.363636	15	7
21	21	506	0.142857	4	3
22	22	762	-0.272727	4	7
23	23	669	0.111111	5	4
24	24	316	-0.250000	3	5
25	25	287	-0.999999	0	1
26	26	243	-0.333333	1	2
27	27	326	0.166667	7	5
28	28	415	-0.571429	3	11
29	29	700	0.066667	8	7
30	30	277	1.000000	3	0
31	31	143	-0.333333	1	2
32	32	240	-0.200000	2	3
33	33	312	0.000000	3	3
34	34	308	-0.428571	2	5
35	35	507	0.200000	6	4
36	36	647	0.000000	5	5
37	37	675	-0.384615	4	9
38	38	414	0.636364	9	2
39	39	204	0.500000	3	1
40	40	241	-0.400000	3	7
41	41	332	0.400000	7	3
42	42	482	0.166667	7	5
43	43	168	0.333333	2	1
44	44	447	-0.200000	2	3
45	45	405	0.428571	5	2
46	46	124	1.000000	2	0